import json
import os
import re
from selenium import webdriver
from bs4 import BeautifulSoup
import requests


class Get_Maoyan():

    def __init__(self, url, head):
        self.url = url
        self.head = head

    def get_cookie(self):
        driver = webdriver.PhantomJS()
        driver.get(self.url)
        cookie_list = driver.get_cookies()
        cookie = {}
        for i in cookie_list:
            cookie[i['name']] = i['value']
        with open('cookie', 'w') as f:
            f.write(json.dumps(cookie))
        driver.close()

    def send_request(self, url, cookie):
        # send_request
        session = requests.session()
        requests.utils.add_dict_to_cookiejar(session.cookies, json.loads(cookie))
        response = session.get(url, headers=self.head)
        ret = response.content.decode('utf8')
        with open('html', 'w', encoding='utf-8') as f:
            f.write(ret)

    def save_item(self, film_name, film_poster):
        with open('film_info', 'a+', encoding='utf-8') as f:
            f.write(film_name)
            f.write('\t')
            f.write(film_poster)
            f.write('\n')

    def run(self):
        # ==========get_cookie
        self.get_cookie()
        f = open('cookie', 'r')
        cookie = f.readline().strip()
        f.close()
        url = self.url
        while True:
            # ==========send_request
            self.send_request(url, cookie)
            # ==========get_html
            with open('html', 'r', encoding='utf-8') as f:
                html = f.read()
            soup = BeautifulSoup(html, 'lxml')
            a = soup.find_all(attrs={"data-src": re.compile(".*")}, alt=re.compile(".*"))
            for i in a:
                film_name = i['alt'].replace('海报封面', '')
                film_poster = re.sub('\d*w_\d*h', '2080w_2920h', i['data-src'])
                self.save_item(film_name,film_poster)
            next = soup.select('#app > div > div.movies-panel > div.movies-pager > ul > li:nth-child(5) > a')

            try:
                next[0].text
                url = re.sub('\d+$', str(int(re.search('\d+$', url).group()) + 30), url)
                continue
            except Exception:
                os.remove('html')
                os.remove('ghostdriver.log')
                os.remove('cookie')
                break


if __name__ == '__main__':
    url = 'https://maoyan.com/films?showType=2&offset=0'
    head = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
                          "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
            "Referer": "https://maoyan.com/"}
    a = Get_Maoyan(url, head)
    a.run()
